global username = c(username)
global dropbox "/Users/${username}/Dropbox"
global master_file_path "${dropbox}/Engel_GFT/replication_files"

global input "$master_file_path\data/intermediate_data"
global output "$master_file_path\data/intermediate_data/conventional_price_indices"

cd "$master_file_path\data\intermediate_data"
set more off


**************************************************************
****      Part 1: Consumption and unit prices within each round
*Note: Prices are district specific (not districtXdecile specific)
*      Consumption shares vary across districtXdeciles
**************************************************************

*This code creates: R43_district_X_decile_level_V4.dta
if 1==1 {
	
  *(1) round 43
   set more off


    use "${input}/R43R55/expenditures_item_level_R43R55_i_groupsV1.dta" if round==43, clear

    *drop households with flagged discrepancies between sum of exp. and mpce
    drop if flag_mpce==1

    *drop goods with no quantity data
    drop if quantity==.
    drop if quantity==0

    *Keep only goods in our 34 i groupings:
    drop if flag_deaton_drops==1

	replace concorded_itemlabel2=trim(concorded_itemlabel2)

	*Get rid of products beyond fuel and light:
	drop if round==43 & itemcode43>=480
	drop if round==55 & Srl_no_of_item>=360

    *calculate expenditures and quantities at the hhid X item level:
    collapse (sum) expenditure quantity, by(state43 district43 sector wt hhsize mpce hhid concorded_itemlabel2 i_groupsV1 g_groupsV3)

	*Describe how many articles the average household buys from the list of good (132, see bewow where we tab item_seq)
	preserve
	drop if sector=="Urban"
	gen one=1
	egen items_bought=total(one), by( state43 district43 sector hhid)
	duplicates drop state43 district43 sector hhid, force
	sum items_bought [weight=wt]
	display "Average number of NSS items bought in R43 is " r(mean)
	restore

    gen price=expenditure/quantity
    replace price=round(price, 0.01)

    *Angus' automatic test for unit price outliers:
    gen log_price=log(price)
    egen sd_log_price=sd(log_price), by(concorded_itemlabel2)
    egen mean_log_price=mean(log_price), by(concorded_itemlabel2)
    gen diff=abs(log_price-mean_log_price)
    gen compare=0
    replace compare=1 if diff>2*sd_log_price
    drop if compare==1
    drop compare diff mean_log_price sd_log_price log_price

    bysort concorded_itemlabel2 state43 sector: egen price_state=median(price)
    replace price_state=round(price_state, 0.01)

    bysort concorded_itemlabel2 state43 district43 sector: egen price_district=median(price)
    replace price_district=round(price_district, 0.01)
	
    egen item_seq = group(concorded_itemlabel2), label
    egen i_groupsV1_seq=group(i_groupsV1), label

	*This tells us the number of NSS items we work with (after deaton drops, merges, concordances)
	tab item_seq
	
    preserve
    duplicates drop item_seq concorded_itemlabel2 i_groupsV1 g_groupsV3, force
    tempfile item_seq
    save `item_seq'
    restore

    keep expenditure quantity price_state price_district state43 sector district43 hhid wt hhsize mpce item_seq

    egen sum_exp=total(expenditure), by(state43 sector district43 hhid)

    reshape wide expenditure quantity price_state price_district , i(state43 district43 sector wt hhsize mpce hhid sum_exp ) j(item_seq)

    *Fill in with 0 in case of no purchases:
         foreach var of varlist expenditure* quantity* {
	     replace `var'=0 if `var'==.
		 }

	 	
    *create household budget shares:
	  foreach var of varlist expenditure* {
         loc a=substr("`var'",12,.)
		 gen share_`a'=(`var')/sum_exp
		 }

		 egen cover_hh=rowtotal(share_*)
         egen deaton_exp=rowtotal(expenditure*)
         gen deaton_exp_pc=deaton_exp/hhsize


    *Create district level budget shares:
        *Total expenditure by state and sector:
         bysort state43 sector district43: egen tot_exp_dist=total(sum_exp*wt)

         *Expenditure on each good by state and sector:
         foreach var of varlist expenditure* {
         loc a=substr("`var'",12,.)
         egen exp_`a'=total(`var'*wt) ,  by(state43 sector district43)
         gen sharedist_`a'=exp_`a'/tot_exp_dist

		 }
         drop exp_*

    *create 9 deciles of income for each districtXsector and for each stateXsector (for democratic CPI) (centered at 6-15, 16-25, etc.):
    egen decile_st_bounds= xtile(sum_exp), by(state43 sector) nq(20)
    egen decile_st= xtile(sum_exp) if decile_st_bounds!=1 & decile_st_bounds!=20, by(state43 sector) nq(9)
    drop decile_st_bounds

    egen decile_dist_bounds= xtile(sum_exp), by(state43 sector district43) nq(20)
    egen decile_dist= xtile(sum_exp) if decile_dist_bounds!=1 & decile_dist_bounds!=20, by(state43 sector district43) nq(9)
    drop decile_dist_bounds

    *Calculate democratic shares at stateXdecile and districtXdecile level:
	foreach var of varlist share_* {
	loc a=substr("`var'",7,.)
    *Democratic weights district X decile level:
	bysort state sector district43 decile_dist: egen ave_share_decile_dist_`a'=wtmean(`var'), weight(wt)
	}

    *fill district level prices:
  foreach var of varlist price_district* {
     loc a=substr("`var'",15,.)
	 bysort state43 sector district43: egen price_dist_`a'=max(`var')
	 cap drop `var'
	 }


    *fill state level prices:
  foreach var of varlist price_state* {
     loc a=substr("`var'",12,.)
	*can use the max since they are all the same:
    bysort state43 sector : egen price_st_`a'=max(`var')
	 cap drop `var'
	 }

    egen dist_wt=total(wt), by(state43 sector district43)


    *Calculate weighted average share of outlays on deaton goods by decile district:
    gen coverage_hhd=deaton_exp_pc/mpce
	bysort state sector district43 decile_dist: egen coverage_decile_dist=wtmean(coverage_hhd), weight(wt)
    replace coverage_decile_dist=1 if coverage_decile_dist>=1

   *districtXdecile dataset:
   duplicates drop state43 sector district43 decile_dist, force
   drop if decile_dist==.
   keep state43 sector district43 decile_dist dist_wt price_st_* price_dist_* ave_share_decile_dist_*  coverage_decile_dist


*Fill in state level prices with national prices if needed.
    *national prices prices:
    foreach var of varlist price_st_* {
     loc a=substr("`var'",10,.)
    bysort sector : egen price_nat_`a'=mean(`var')
    replace `var'=price_nat_`a' if `var'==.
	 }

*Fill in district prices with state level prices above:
    foreach var of varlist price_dist_* {
     loc a=substr("`var'",12,.)
    gen price_fill_dist_`a'=price_dist_`a'
    replace price_fill_dist_`a'=price_st_`a' if  price_fill_dist_`a'==.
	 }

drop price_nat_* price_st_*


   save "$output/R43_district_X_decile_level_V4",  replace
   
}



*This code creates: R55_district_X_decile_level_V4.dta
if 1==1 {
	
  *(1) round 55
   set more off


    use "${input}/R43R55/expenditures_item_level_R43R55_i_groupsV1.dta" if round==55, clear
    drop hhsize
    rename Household_size hhsize

    *drop households with flagged discrepancies between sum of exp. and mpce
    drop if flag_mpce==1

    *drop goods with no quantity data
    drop if quantity==.
    drop if quantity==0

    *Keep only goods in our 34 i groupings:
    drop if flag_deaton_drops==1

	replace concorded_itemlabel2=trim(concorded_itemlabel2)

	*Make list of goods consistent over rounds:
	*note: other cereals only drops in R55:
    drop if concorded_itemlabel2=="Other cereals"

	*Get rid of products beyond fuel and light:
	drop if round==43 & itemcode43>=480
	drop if round==55 & Srl_no_of_item>=360

	replace quantity=quantity/100
	replace quantity=round(quantity, 0.01)

    *calculate expenditures and quantities at the hhid X item level:
    collapse (sum) expenditure quantity, by(state43 district43 sector wt hhsize mpce hhid concorded_itemlabel2 i_groupsV1 g_groupsV3)

    gen price=expenditure/quantity
    replace price=round(price, 0.01)

    *Angus' automatic test for unit price outliers:
    gen log_price=log(price)
    egen sd_log_price=sd(log_price), by(concorded_itemlabel2)
    egen mean_log_price=mean(log_price), by(concorded_itemlabel2)
    gen diff=abs(log_price-mean_log_price)
    gen compare=0
    replace compare=1 if diff>2*sd_log_price
    drop if compare==1
    drop compare diff mean_log_price sd_log_price log_price

    bysort concorded_itemlabel2 state43 sector: egen price_state=median(price)
    replace price_state=round(price_state, 0.01)

    bysort concorded_itemlabel2 state43 district43 sector: egen price_district=median(price)
    replace price_district=round(price_district, 0.01)
	
    egen item_seq = group(concorded_itemlabel2), label
    egen i_groupsV1_seq=group(i_groupsV1), label

	*This tells us the number of NSS items we work with (after deaton drops, merges, concordances)
	tab item_seq

    preserve
    duplicates drop item_seq concorded_itemlabel2 i_groupsV1 g_groupsV3, force
    tempfile item_seq
    save `item_seq'
    restore

    keep expenditure quantity price_state price_district state43 sector district43 hhid wt hhsize mpce item_seq

    egen sum_exp=total(expenditure), by(state43 sector district43 hhid)

    reshape wide expenditure quantity price_state price_district , i(state43 district43 sector wt hhsize mpce hhid sum_exp) j(item_seq)

    *Fill in with 0 in case of no purchases:
         foreach var of varlist expenditure* quantity* {
	     replace `var'=0 if `var'==.
		 }

	 	
    *create household budget shares:
	  foreach var of varlist expenditure* {
         loc a=substr("`var'",12,.)
		 gen share_`a'=(`var')/sum_exp
		 }

		 egen cover_hh=rowtotal(share_*)

         egen deaton_exp=rowtotal(expenditure*)
         gen deaton_exp_pc=deaton_exp/hhsize


    *Create district level budget shares:
        *Total expenditure by state and sector:
         bysort state43 sector district43: egen tot_exp_dist=total(sum_exp*wt)

         *Expenditure on each good by state and sector:
         foreach var of varlist expenditure* {
         loc a=substr("`var'",12,.)
         egen exp_`a'=total(`var'*wt) ,  by(state43 sector district43)
         gen sharedist_`a'=exp_`a'/tot_exp_dist

		 }
         drop exp_*

    *create 9 deciles of income for each districtXsector and for each stateXsector (for democratic CPI) (centered at 6-15, 16-25, etc.):
    egen decile_st_bounds= xtile(sum_exp), by(state43 sector) nq(20)
    egen decile_st= xtile(sum_exp) if decile_st_bounds!=1 & decile_st_bounds!=20, by(state43 sector) nq(9)
    drop decile_st_bounds

    egen decile_dist_bounds= xtile(sum_exp), by(state43 sector district43) nq(20)
    egen decile_dist= xtile(sum_exp) if decile_dist_bounds!=1 & decile_dist_bounds!=20, by(state43 sector district43) nq(9)
    drop decile_dist_bounds

    *Calculate democratic shares at stateXdecile and districtXdecile level:
	foreach var of varlist share_* {
	loc a=substr("`var'",7,.)
    *Democratic weights district X decile level:
	bysort state sector district43 decile_dist: egen ave_share_decile_dist_`a'=wtmean(`var'), weight(wt)
	}

    *fill district level prices:
  foreach var of varlist price_district* {
     loc a=substr("`var'",15,.)
	 bysort state43 sector district43: egen price_dist_`a'=max(`var')
	 cap drop `var'
	 }


    *fill state level prices:
  foreach var of varlist price_state* {
     loc a=substr("`var'",12,.)
	*can use the max since they are all the same:
    bysort state43 sector : egen price_st_`a'=max(`var')
	 cap drop `var'
	 }


    egen dist_wt=total(wt), by(state43 sector district43)


   *Calculate weighted average share of outlays on deaton goods by decile district:
    gen coverage_hhd=deaton_exp_pc/mpce
	bysort state sector district43 decile_dist: egen coverage_decile_dist=wtmean(coverage_hhd), weight(wt)
    replace coverage_decile_dist=1 if coverage_decile_dist>=1


   *districtXdecile dataset:

   duplicates drop state43 sector district43 decile_dist, force
   drop if decile_dist==.
   keep state43 sector district43 decile_dist dist_wt  coverage_decile_dist price_st_* price_dist_* ave_share_decile_dist_*


*Fill in state level prices with national prices if needed.
    *national prices prices:
    foreach var of varlist price_st_* {
     loc a=substr("`var'",10,.)
    bysort sector : egen price_nat_`a'=mean(`var')
    replace `var'=price_nat_`a' if `var'==.
	 }

*Fill in district prices with state level prices above:
    foreach var of varlist price_dist_* {
     loc a=substr("`var'",12,.)
    gen price_fill_dist_`a'=price_dist_`a'
    replace price_fill_dist_`a'=price_st_`a' if  price_fill_dist_`a'==.
	 }

drop price_nat_* price_st_*

   save "$output/R55_district_X_decile_level_V4",  replace

     
   
}






**************************************************************
****        Part 2: Construct decileXdistrict CPI: across rounds
****
*State level (Paasche and Laspeyres, plutocratic and democratic)
*District level (Paasche and Laspeyres, plutocratic and democratic)
*StateXdecile (Paasche and Laspeyres, democratic)
*DistrictXdecile (Paasche and Laspeyres, democratic)
**************************************************************


**************************************************************
*DistrictXDecile level (Paasche and Laspeyres, democratic), R43, R55
**************************************************************
if 1==1{

local round1 "43"
local round2 "55"
forvalues i=1/1 {
local initial: word `i' of `round1'
local final: word `i' of `round2'

   set more off
	 use  "$output/R`final'_district_X_decile_level_V4", clear

	 foreach var of varlist ave_share_decile_dist_1- price_fill_dist_132 {
	    rename `var' _`final'_`var'
		}


	
	merge 1:1 state43 sector district43 decile_dist using "$output/R`initial'_district_X_decile_level_v4"
	 foreach var of varlist ave_share_decile_dist_1- price_fill_dist_132 {
	     rename `var' _`initial'_`var'
		 }
    keep if _merge==3
	drop _merge


    *relative prices:
	 foreach var of varlist _`initial'_price_dist_* {
	   loc a=substr("`var'",16,.)
	    gen price_`final'_`initial'_i`a'= _`final'_price_dist_`a'/`var'
	   }

    *relative prices (filled in list):
	 foreach var of varlist _`initial'_price_fill_dist_* {
	   loc a=substr("`var'",21,.)
	    gen price_fill_`final'_`initial'_i`a'= _`final'_price_fill_dist_`a'/`var'
	   }


*District X decile level Laspeyres, democratic:
	preserve

	foreach var of varlist price_`final'_`initial'_i* {
	   loc a=substr("`var'",14,.)
	   gen cpi_i`a'_`initial'_`final'=`var'*_`initial'_ave_share_decile_dist_`a'
	   }
    egen Lcpi_`initial'_`final'_decile=rowtotal(cpi_i*_`initial'_`final'), mis

    *Now for filled in prices
	foreach var of varlist price_fill_`final'_`initial'_i* {
	   loc a=substr("`var'",19,.)
	   gen cpi_fill_i`a'_`initial'_`final'=`var'*_`initial'_ave_share_decile_dist_`a'
	   }
    egen Lcpi_fill_`initial'_`final'_decile=rowtotal(cpi_fill_i*_`initial'_`final'), mis

    keep state43 sector district43  _*_dist_wt  decile_dist _*_coverage_decile_dist Lcpi_`initial'_`final'_decile Lcpi_fill_`initial'_`final'_decile

    tempfile Lcpi_`initial'_`final'_decile
    save `Lcpi_`initial'_`final'_decile'

    restore


*District X decile level Paasche, democratic:

	foreach var of varlist price_`final'_`initial'_i* {
	   loc a=substr("`var'",14,.)
	   gen cpi_i`a'_`initial'_`final'=(1/`var')*_`final'_ave_share_decile_dist_`a'
	   }
	   egen Pcpi_`initial'_`final'_decile_x=rowtotal(cpi_i*_`initial'_`final'), mis
        gen Pcpi_`initial'_`final'_decile=1/Pcpi_`initial'_`final'_decile_x
        drop Pcpi_`initial'_`final'_decile_x
        drop cpi_i*

    *Now using filled in prices:
	foreach var of varlist price_fill_`final'_`initial'_i* {
	   loc a=substr("`var'",19,.)
	   gen cpi_fill_i`a'_`initial'_`final'=(1/`var')*_`final'_ave_share_decile_dist_`a'
	   }
	   egen Pcpi_fill_`initial'_`final'_decile_x=rowtotal(cpi_fill_i*_`initial'_`final'), mis
        gen Pcpi_fill_`initial'_`final'_decile=1/Pcpi_fill_`initial'_`final'_decile_x
        drop Pcpi_fill_`initial'_`final'_decile_x


    keep state43 sector district43  _*_dist_wt  decile_dist _*_coverage_decile_dist Pcpi_`initial'_`final'_decile Pcpi_fill_`initial'_`final'_decile

    tempfile Pcpi_`initial'_`final'_decile
    save `Pcpi_`initial'_`final'_decile'
}

*Assemble districtXdecile CPIs:

    use `Lcpi_43_55_decile',clear
    merge 1:1 state43 sector district43 decile_dist using `Pcpi_43_55_decile'
    drop _merge


	save "$output/CPI_district_X_decile_level_V4", replace



*end if 1==1:
}


